Introduction
This simulation models a plant population with multiple life history
traits using AlphaSimR. The model includes size-dependent reproductive
traits and exponential decay in QTL effect sizes to simulate realistic
genetic architecture.
Load Required Libraries
library(AlphaSimR)
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(ggdoctheme)
library(dplyr)
library(patchwork)
library(gridExtra)
library(knitr)
library(rstatix)
# Set seed for reproducibility
set.seed(123)
Simulation Parameters
# Population parameters
n_individuals = 1000
n_loci = 100
n_traits = 6
# Trait names
trait_names = c("Germination", "Initial_Size", "Establishment",
"Growth_Rate", "Flowering_Prob", "Fruit_Per_Plant")
# Heritabilities for each trait
h2 <- c(0.6, 0.5, 0.7, 0.4, 0.3, 0.4)
names(h2) <- trait_names
print("Simulation Parameters:")
## [1] "Simulation Parameters:"
print(paste("Number of individuals:", n_individuals))
## [1] "Number of individuals: 1000"
print(paste("Number of loci per trait:", n_loci))
## [1] "Number of loci per trait: 100"
print(paste("Number of traits:", n_traits))
## [1] "Number of traits: 6"
print("Heritabilities:")
## [1] "Heritabilities:"
print(h2)
## Germination Initial_Size Establishment Growth_Rate Flowering_Prob
## 0.6 0.5 0.7 0.4 0.3
## Fruit_Per_Plant
## 0.4
Genetic Architecture Setup
# Create founder genomes
founderPop = runMacs(nInd = 20, # Small founder population
nChr = 10, # 10 chromosomes
segSites = n_loci * n_traits / 10, # Distribute loci across chromosomes
species = "GENERIC")
# Set up simulation parameters
SP = SimParam$new(founderPop)
# Function to generate exponentially decaying effect sizes
generate_effects = function(n_loci, decay_rate = 0.1) {
# Generate effects with exponential decay
ranks = 1:n_loci
effects = exp(-decay_rate * (ranks - 1))
# Normalize so largest effect = 1
effects = effects / max(effects)
# Add some random sign variation
signs = sample(c(-1, 1), n_loci, replace = TRUE, prob = c(0.4, 0.6))
effects = effects * signs
return(effects)
}
# Generate QTL effects for each trait
qtl_effects = list()
for(i in 1:n_traits) {
qtl_effects[[i]] = generate_effects(n_loci, decay_rate = 0.08)
}
# Add traits to simulation parameters
for(i in 1:n_traits) {
SP$addTraitA(nQtlPerChr = n_loci/10, # Distribute across chromosomes
mean = 0,
var = 1,
corA = NULL,
gamma = FALSE,
shape = 1,
force = TRUE,
name = trait_names[i])
}
# Set heritabilities
SP$setVarE(h2 = h2)
print("Genetic architecture established with exponential decay in effect sizes")
## [1] "Genetic architecture established with exponential decay in effect sizes"
Generate F2 Population
# Create initial parents from founders
parents = newPop(founderPop, simParam = SP)
# Create F1 by random mating
F1 = randCross(parents, nCrosses = 500, simParam = SP)
# Create F2 population
F2 = randCross(F1, nCrosses = n_individuals, simParam = SP)
print(paste("Generated F2 population with", F2@nInd, "individuals"))
## [1] "Generated F2 population with 1000 individuals"
Compare Genetic Values vs Phenotypes in F2
# Create comparison data frame for F2
comparison_data = data.frame(
Individual = rep(1:nrow(gv_F2), n_traits),
Trait = rep(trait_names, each = nrow(gv_F2)),
Genetic_Value = as.vector(gv_F2),
Phenotype = as.vector(pheno_F2)
)
# Calculate correlations between genetic values and phenotypes for each trait
correlations = sapply(1:n_traits, function(i) {
cor(gv_F2[,i], pheno_F2[,i])
})
names(correlations) = trait_names
# Create summary table
correlation_summary = data.frame(
Trait = trait_names,
Heritability = h2,
GV_Pheno_Correlation = correlations,
Expected_Correlation = sqrt(h2) # Theoretical expectation
)
kable(correlation_summary, digits = 3,
caption = "Genetic Value vs Phenotype Correlations")
Genetic Value vs Phenotype Correlations
| Germination |
Germination |
0.6 |
0.805 |
0.775 |
| Initial_Size |
Initial_Size |
0.5 |
0.691 |
0.707 |
| Establishment |
Establishment |
0.7 |
0.826 |
0.837 |
| Growth_Rate |
Growth_Rate |
0.4 |
0.691 |
0.632 |
| Flowering_Prob |
Flowering_Prob |
0.3 |
0.518 |
0.548 |
| Fruit_Per_Plant |
Fruit_Per_Plant |
0.4 |
0.668 |
0.632 |
print("Correlations between genetic values and phenotypes:")
## [1] "Correlations between genetic values and phenotypes:"
print(round(correlations, 3))
## Germination Initial_Size Establishment Growth_Rate Flowering_Prob
## 0.805 0.691 0.826 0.691 0.518
## Fruit_Per_Plant
## 0.668
Visualization: Genetic Values vs Phenotypes
# Create scatter plots for each trait
plot_list = list()
for(i in 1:n_traits) {
trait_data_all = data.frame(
GV = gv_F2[,i],
Phenotype = pheno_F2[,i]
)
p = ggplot(trait_data_all, aes(x = GV, y = Phenotype)) +
geom_point(alpha = 0.6, color = "blue") +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = paste(trait_names[i]),
subtitle = paste("r =", round(correlations[i], 3),
"| h² =", h2[i]),
x = "Genetic Value (Breeding Value)",
y = "Phenotype") +
theme_minimal() +
theme(plot.title = element_text(size = 12, hjust = 0.5))
plot_list[[i]] = p
}
# Arrange plots
do.call(grid.arrange, c(plot_list, ncol = 3))

Phenotype Distributions Across Generations
# Prepare data for plotting distributions across generations
# Fix the trait assignment
dist_data <- data.frame(
Generation = c(rep("Parents", nrow(pheno_parents) * n_traits),
rep("F1", nrow(pheno_F1) * n_traits),
rep("F2", nrow(pheno_F2) * n_traits)),
Trait = c(rep(trait_names, nrow(pheno_parents)),
rep(trait_names, nrow(pheno_F1)),
rep(trait_names, nrow(pheno_F2))),
Phenotype = c(as.vector(t(pheno_parents)),
as.vector(t(pheno_F1)),
as.vector(t(pheno_F2)))
)
# Set generation as factor with proper order
dist_data$Generation <- factor(dist_data$Generation,
levels = c("Parents", "F1", "F2"))
print("Distribution data prepared")
## [1] "Distribution data prepared"
print("Summary of phenotype distributions:")
## [1] "Summary of phenotype distributions:"
summary_stats <- dist_data %>%
group_by(Generation, Trait) %>%
summarise(
Mean = mean(Phenotype),
SD = sd(Phenotype),
Min = min(Phenotype),
Max = max(Phenotype),
.groups = "drop"
)
kable(summary_stats, digits = 3, caption = "Phenotype Summary Statistics by Generation")
Phenotype Summary Statistics by Generation
| Parents |
Establishment |
-0.132 |
1.212 |
-2.928 |
1.492 |
| Parents |
Flowering_Prob |
-0.456 |
1.793 |
-3.990 |
2.652 |
| Parents |
Fruit_Per_Plant |
0.035 |
1.280 |
-1.917 |
2.643 |
| Parents |
Germination |
0.003 |
1.090 |
-1.873 |
2.081 |
| Parents |
Growth_Rate |
-0.048 |
1.427 |
-2.748 |
2.183 |
| Parents |
Initial_Size |
-0.055 |
1.594 |
-3.357 |
2.607 |
| F1 |
Establishment |
0.011 |
1.163 |
-3.258 |
3.290 |
| F1 |
Flowering_Prob |
-0.024 |
1.777 |
-5.699 |
5.031 |
| F1 |
Fruit_Per_Plant |
0.042 |
1.689 |
-4.154 |
5.711 |
| F1 |
Germination |
-0.022 |
1.330 |
-3.338 |
5.612 |
| F1 |
Growth_Rate |
0.031 |
1.596 |
-4.869 |
5.183 |
| F1 |
Initial_Size |
0.013 |
1.376 |
-4.142 |
3.763 |
| F2 |
Establishment |
0.012 |
1.174 |
-3.862 |
3.028 |
| F2 |
Flowering_Prob |
0.000 |
1.808 |
-5.844 |
6.216 |
| F2 |
Fruit_Per_Plant |
0.040 |
1.640 |
-5.369 |
4.788 |
| F2 |
Germination |
-0.001 |
1.362 |
-3.794 |
4.528 |
| F2 |
Growth_Rate |
-0.034 |
1.698 |
-5.498 |
5.016 |
| F2 |
Initial_Size |
-0.046 |
1.378 |
-4.903 |
4.521 |
Density Plots for Better Comparison
# Create density plots for better comparison across generations
density_plots = list()
for(i in 1:n_traits) {
trait_subset = dist_data[dist_data$Trait == trait_names[i], ]
p = ggplot(trait_subset, aes(x = Phenotype, color = Generation, fill = Generation)) +
geom_density(alpha = 0.3, size = 1) +
scale_color_manual(values = c("Parents" = "red", "F1" = "green", "F2" = "blue")) +
scale_fill_manual(values = c("Parents" = "red", "F1" = "green", "F2" = "blue")) +
labs(title = paste("Phenotype Density:", trait_names[i]),
x = "Phenotype Value",
y = "Density") +
theme_minimal() +
theme(legend.position = "bottom")
density_plots[[i]] = p
}
# Arrange density plots
do.call(grid.arrange, c(density_plots, ncol = 3))

Variance Components Analysis
# Calculate variance components for each trait in each generation
variance_analysis <- data.frame(
Trait = rep(trait_names, 3),
Generation = rep(c("Parents", "F1", "F2"), each = n_traits),
Phenotypic_Variance = c(apply(pheno_parents, 2, var),
apply(pheno_F1, 2, var),
apply(pheno_F2, 2, var)),
Genetic_Variance = c(apply(gv_parents, 2, var),
apply(gv_F1, 2, var),
apply(gv_F2, 2, var))
)
# Calculate environmental variance
variance_analysis$Environmental_Variance <- variance_analysis$Phenotypic_Variance -
variance_analysis$Genetic_Variance
# Calculate realized heritability
variance_analysis$Realized_Heritability <- variance_analysis$Genetic_Variance /
variance_analysis$Phenotypic_Variance
kable(variance_analysis, digits = 3,
caption = "Variance Components Analysis by Generation")
Variance Components Analysis by Generation
| Germination |
Parents |
1.188 |
1.053 |
0.136 |
0.886 |
| Initial_Size |
Parents |
2.542 |
1.053 |
1.489 |
0.414 |
| Establishment |
Parents |
1.470 |
1.053 |
0.417 |
0.716 |
| Growth_Rate |
Parents |
2.035 |
1.053 |
0.983 |
0.517 |
| Flowering_Prob |
Parents |
3.216 |
1.053 |
2.163 |
0.327 |
| Fruit_Per_Plant |
Parents |
1.638 |
1.053 |
0.586 |
0.642 |
| Germination |
F1 |
1.768 |
1.173 |
0.596 |
0.663 |
| Initial_Size |
F1 |
1.892 |
0.965 |
0.928 |
0.510 |
| Establishment |
F1 |
1.354 |
0.993 |
0.361 |
0.733 |
| Growth_Rate |
F1 |
2.546 |
1.077 |
1.469 |
0.423 |
| Flowering_Prob |
F1 |
3.156 |
0.883 |
2.273 |
0.280 |
| Fruit_Per_Plant |
F1 |
2.853 |
1.256 |
1.597 |
0.440 |
| Germination |
F2 |
1.856 |
1.295 |
0.561 |
0.698 |
| Initial_Size |
F2 |
1.900 |
0.920 |
0.980 |
0.484 |
| Establishment |
F2 |
1.378 |
0.931 |
0.447 |
0.676 |
| Growth_Rate |
F2 |
2.882 |
1.270 |
1.611 |
0.441 |
| Flowering_Prob |
F2 |
3.270 |
0.870 |
2.401 |
0.266 |
| Fruit_Per_Plant |
F2 |
2.688 |
1.346 |
1.342 |
0.501 |
# Plot variance components
variance_long = variance_analysis %>%
select(Trait, Generation, Genetic_Variance, Environmental_Variance) %>%
pivot_longer(cols = c("Genetic_Variance", "Environmental_Variance"),
names_to = "Variance_Component",
values_to = "Variance")
ggplot(variance_long, aes(x = Generation, y = Variance, fill = Variance_Component)) +
geom_bar(stat = "identity", position = "stack") +
facet_wrap(~Trait, scales = "free_y") +
scale_fill_manual(values = c("Genetic_Variance" = "lightblue",
"Environmental_Variance" = "lightcoral")) +
labs(title = "Variance Components Across Generations",
x = "Generation",
y = "Variance",
fill = "Variance Component") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Heritability Validation
# Compare expected vs realized heritabilities
heritability_comparison <- data.frame(
Trait = trait_names,
Expected_h2 = h2,
Realized_h2_Parents = variance_analysis$Realized_Heritability[1:n_traits],
Realized_h2_F1 = variance_analysis$Realized_Heritability[(n_traits+1):(2*n_traits)],
Realized_h2_F2 = variance_analysis$Realized_Heritability[(2*n_traits+1):(3*n_traits)]
)
kable(heritability_comparison, digits = 3,
caption = "Expected vs Realized Heritabilities")
Expected vs Realized Heritabilities
| Germination |
Germination |
0.6 |
0.886 |
0.663 |
0.698 |
| Initial_Size |
Initial_Size |
0.5 |
0.414 |
0.510 |
0.484 |
| Establishment |
Establishment |
0.7 |
0.716 |
0.733 |
0.676 |
| Growth_Rate |
Growth_Rate |
0.4 |
0.517 |
0.423 |
0.441 |
| Flowering_Prob |
Flowering_Prob |
0.3 |
0.327 |
0.280 |
0.266 |
| Fruit_Per_Plant |
Fruit_Per_Plant |
0.4 |
0.642 |
0.440 |
0.501 |
# Plot heritability comparison
herit_long <- heritability_comparison |>
pivot_longer(cols = -Trait, names_to = "Heritability_Type", values_to = "Heritability")
ggplot(herit_long, aes(x = Trait, y = Heritability, fill = Heritability_Type)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Expected_h2" = "gold",
"Realized_h2_Parents" = "red",
"Realized_h2_F1" = "green",
"Realized_h2_F2" = "blue")) +
labs(title = "Expected vs Realized Heritabilities",
x = "Trait",
y = "Heritability",
fill = "Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Size-Dependent Reproductive Traits
# Calculate final size based on initial size and growth rate
trait_data$Final_Size = trait_data$Initial_Size * trait_data$Growth_Rate
# Size-dependent flowering probability
# Larger plants have higher flowering probability
size_effect_flowering = (trait_data$Final_Size - min(trait_data$Final_Size)) /
(max(trait_data$Final_Size) - min(trait_data$Final_Size))
trait_data$Flowering_Prob = trait_data$Flowering_Prob_base * (0.5 + 0.5 * size_effect_flowering)
trait_data$Flowering_Prob = pmin(trait_data$Flowering_Prob, 0.95) # Cap at 95%
# Size-dependent fruit production
# Only flowering plants produce fruit, and larger plants produce more
trait_data$Flowers = rbinom(nrow(trait_data), 1, trait_data$Flowering_Prob)
size_effect_fruit = (trait_data$Final_Size - min(trait_data$Final_Size)) /
(max(trait_data$Final_Size) - min(trait_data$Final_Size))
trait_data$Fruit_Per_Plant = trait_data$Flowers *
(trait_data$Fruit_Per_Plant_base * (0.3 + 0.7 * size_effect_fruit))
print("Size-dependent traits calculated")
## [1] "Size-dependent traits calculated"
print(paste("Proportion flowering:", round(mean(trait_data$Flowers), 3)))
## [1] "Proportion flowering: 0.347"
print(paste("Mean fruits per flowering plant:",
round(mean(trait_data$Fruit_Per_Plant[trait_data$Flowers == 1]), 2)))
## [1] "Mean fruits per flowering plant: 4.04"
Visualization
# Create summary statistics
summary_stats = trait_data |>
get_summary_stats(type = "common")
# Print summary
kable(t(summary_stats), digits = 3, caption = "Trait Summary Statistics")
Trait Summary Statistics
| variable |
Germination |
Initial_Size |
Establishment |
Growth_Rate |
Flowering_Prob |
Fruit_Per_Plant |
Flowering_Prob_base |
Fruit_Per_Plant_base |
Individual |
Final_Size |
Flowers |
| n |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
1000 |
| min |
0.163 |
3.090 |
0.243 |
1.348 |
0.180 |
0.000 |
0.319 |
2.158 |
1.000 |
6.222 |
0.000 |
| max |
0.872 |
20.219 |
0.751 |
5.011 |
0.539 |
14.296 |
0.711 |
29.188 |
1000.000 |
66.499 |
1.000 |
| median |
0.497 |
7.301 |
0.506 |
2.738 |
0.308 |
0.000 |
0.500 |
7.576 |
500.500 |
20.235 |
0.000 |
| iqr |
0.186 |
2.958 |
0.126 |
0.893 |
0.065 |
2.702 |
0.097 |
4.852 |
499.500 |
9.868 |
1.000 |
| mean |
0.498 |
7.693 |
0.502 |
2.800 |
0.312 |
1.401 |
0.500 |
8.436 |
500.500 |
21.452 |
0.347 |
| sd |
0.132 |
2.273 |
0.093 |
0.636 |
0.050 |
2.295 |
0.069 |
4.134 |
288.819 |
7.802 |
0.476 |
| se |
0.004 |
0.072 |
0.003 |
0.020 |
0.002 |
0.073 |
0.002 |
0.131 |
9.133 |
0.247 |
0.015 |
| ci |
0.008 |
0.141 |
0.006 |
0.039 |
0.003 |
0.142 |
0.004 |
0.257 |
17.923 |
0.484 |
0.030 |
# Plot 1: Distribution of key traits
p1 = ggplot(trait_data, aes(x = Germination)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "skyblue") +
labs(title = "Germination Rate Distribution", x = "Germination Probability", y = "Count") +
theme_doc()
p2 = ggplot(trait_data, aes(x = Final_Size)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "lightgreen") +
labs(title = "Final Size Distribution", x = "Final Size (mm)", y = "Count") +
theme_doc()
p3 = ggplot(trait_data, aes(x = Flowering_Prob)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "orange") +
labs(title = "Flowering Probability Distribution", x = "Flowering Probability", y = "Count") +
theme_doc()
p4 = ggplot(trait_data, aes(x = Fruit_Per_Plant)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "pink") +
labs(title = "Fruit Production Distribution", x = "Fruits per Plant", y = "Count") +
theme_doc()
(p1 + p2)/( p3 + p4)

# Plot 2: Size-dependent relationships
p5 = ggplot(trait_data, aes(x = Final_Size, y = Flowering_Prob)) +
geom_point(alpha = 0.6, color = "blue") +
geom_smooth(method = "loess", color = "red") +
labs(title = "Size vs Flowering Probability",
x = "Final Size (mm)", y = "Flowering Probability") +
theme_doc()
p6 = ggplot(trait_data[trait_data$Flowers == 1, ],
aes(x = Final_Size, y = Fruit_Per_Plant)) +
geom_point(alpha = 0.6, color = "darkgreen") +
geom_smooth(method = "loess", color = "red") +
labs(title = "Size vs Fruit Production (Flowering Plants Only)",
x = "Final Size (mm)", y = "Fruits per Plant") +
theme_doc()
p5 + p6

# Plot 3: Trait correlations
cor_traits = trait_data |>
select(Germination, Initial_Size, Establishment, Growth_Rate,
Final_Size, Flowering_Prob, Fruit_Per_Plant) |>
cor()
# Create correlation heatmap
cor_df = as.data.frame(as.table(cor_traits))
names(cor_df) = c("Trait1", "Trait2", "Correlation")
ggplot(cor_df, aes(Trait1, Trait2, fill = Correlation)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1)) +
theme_doc() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Trait Correlation Matrix")

Fitness and Selection Analysis
# Calculate composite fitness measure
# Fitness = Germination × Establishment × (Fruit_Per_Plant + 1)
trait_data$Fitness = trait_data$Germination *
trait_data$Establishment *
(trait_data$Fruit_Per_Plant + 1)
# Standardize fitness
trait_data$Fitness_Std = scale(trait_data$Fitness)[,1]
# Selection analysis
selection_summary = trait_data |>
summarise(
Mean_Fitness = mean(Fitness),
Var_Fitness = var(Fitness),
CV_Fitness = sd(Fitness)/mean(Fitness),
Prop_Reproductive = mean(Flowers),
Mean_Reproductive_Success = mean(Fruit_Per_Plant[Flowers == 1])
)
kable(selection_summary, digits = 3, caption = "Population Fitness Summary")
Population Fitness Summary
| 0.6 |
0.392 |
1.043 |
0.347 |
4.038 |
# Plot fitness distribution
ggplot(trait_data, aes(x = Fitness)) +
geom_histogram(bins = 30, alpha = 0.7, fill = "purple") +
geom_vline(xintercept = mean(trait_data$Fitness), color = "red", linetype = "dashed") +
labs(title = "Fitness Distribution",
subtitle = paste("Mean fitness =", round(mean(trait_data$Fitness), 3)),
x = "Composite Fitness", y = "Count") +
theme_doc()

Save Results
# Save the population data
#write.csv(trait_data, "data/F2_population_traits.csv", row.names = FALSE)
# Save genetic values matrix
#write.csv(gv_matrix, "data/F2_genetic_values.csv", row.names = FALSE)
print("Results saved to CSV files:")
## [1] "Results saved to CSV files:"
print("- F2_population_traits.csv: All trait values and derived measures")
## [1] "- F2_population_traits.csv: All trait values and derived measures"
print("- F2_genetic_values.csv: Raw genetic values from AlphaSimR")
## [1] "- F2_genetic_values.csv: Raw genetic values from AlphaSimR"
Summary
In this simulation we successfully generated an F2 population of 1000
individuals with the following characteristics:
Genetic Architecture: Each trait controlled by
100 QTL with exponentially decaying effect sizes, creating realistic
genetic architecture where few loci have large effects.
Life History Traits:
- Germination probability (0-1)
- Initial and final size (cm)
- Establishment probability (0-1)
- Growth rate multiplier
- Size-dependent flowering probability
- Size-dependent fruit production
Key assumptions:
- Larger plants have higher flowering probability and fruit
production
- Only flowering plants produce fruit
- Composite fitness incorporates multiple life stages